#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

import pycurl
import StringIO


def getPubmed(ids):
    split_ids = ids.split(",")
    start = split_ids[0]
    end = split_ids[len(split_ids) - 1]

    url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
    buf = StringIO.StringIO()
    c = pycurl.Curl()
    c.setopt(pycurl.POST, 1)
    c.setopt(c.URL, url)
    c.setopt(pycurl.HTTPPOST, [('db', 'pubmed'), ('id', ids), ('retmode', 'xml')])
    c.setopt(c.WRITEFUNCTION, buf.write)
    c.perform()
    outfile = open("pubmed_" + start + "_" + end + ".xml", "w")
    outfile.write(buf.getvalue())
    outfile.close()
    buf.close()
    print "Pubmed records from " + start + " to " + end + " has been successfully downloaded."

if __name__ == '__main__':
    ids = ""
    for i in range(14720001, 14730001):
        ids += str(i) + ","
        if i % 10000 == 0:
            ids = ids[:-1]
            getPubmed(ids)
            ids = ""
    if not i % 10000 == 0:
        getPubmed(ids)
